******************************************************************************************
* Do-file name:	cr_data_siab_01.do 
* Task:         prepares siab data
* Last change:  17.10.2023 
* Notes:		Based on SIAB_R7519 data
******************************************************************************************



******************************************************************************************
*** program setup                                                                      
******************************************************************************************

version 17.0
clear all
macro drop _all
set linesize 90
set more off
* set trace on
discard
set seed 123456789


******************************************************************************************
*** load dataset                                                                       
******************************************************************************************

** use siab data
use "data\orig/siab_r_7519_v1.dta", clear

** drop variables not needed
drop gleitz leih befrist tage_jung tage_alt

** restrict sample
keep if quelle_gr ==1 	// only BeH


******************************************************************************************
*** rename vars, replace missings and drop old value labels                                                                       
******************************************************************************************

foreach var of varlist  bnn deutsch ausbildung_gr ausbildung_imp schule tentgelt_gr beruf_gr beruf2010_gr ///
						niveau teilzeit stib erwstat_gr grund_gr ao_region pendler w08_gen_gr {
	replace `var' = .  if `var' == .z
	replace `var' = .  if `var' == .n
	replace `var' = .  if `var' == .a
	}

*** persnr
rename persnr vsnr_ano

*** bnn
label values bnn .

*** frau
rename frau female

*** gebjahr
rename gebjahr gebj

*** deutsch
rename deutsch native

*** grund_gr
rename grund_gr grund

*** tentgelt_gr
rename tentgelt_gr tag_entg
label values tag_entg .

*** beruf_gr
rename beruf_gr beruford

*** stib
rename stib berufstg

*** erwstat_gr
rename erwstat_gr pers_gr

*** ao_region
rename ao_region ao_kreis

*** quelle_gr
rename quelle_gr quelle


******************************************************************************************
*** clean tag_entg                                                                          
******************************************************************************************

** drop negative and missing tag_entg observations in BeH 
drop if (tag_entg < 0 | tag_entg == .) & quelle == 1 


******************************************************************************************
*** create clean data (only employed individuals)                                                                      
******************************************************************************************

** keep employed individuals
keep if quelle == 1

** generate stichtagsdata
gen year=year(endepi)
gen stichtag=mdy(6,30,year)
format stichtag %d
keep if begepi <= stichtag & endepi >= stichtag
drop stichtag

** drop duplicates (obs. with highest income is kept)
gsort vsnr_ano year -tag_entg
by vsnr_ano year: gen num = _n
keep if num == 1
drop num


******************************************************************************************
*** create and change variables                                                                        
******************************************************************************************

** create alter variable
gen alter = year - gebj
drop gebj


******************************************************************************************
*** prepare data                                                                        
******************************************************************************************

sort vsnr_ano year
order vsnr_ano ao_kreis year alter female native tag_entg pers_gr quelle
drop begorig endorig begepi endepi spell

** save basis data
compress
save "data/clean_work.dta", replace


*** End
exit
